--- title: Title keywords: fastai sidebar: home_sidebar nb_path: "nbs/Fun_with_manual_convs.ipynb" ---
{% raw %}
{% endraw %} {% raw %}
from PIL import Image
import torchvision.transforms.functional as TF
from torchvision import transforms
import torch as t
import torch.nn.functional as F
from fastai.vision.data import get_grid
import matplotlib.pyplot as plt
import glob
from PIL.JpegImagePlugin import JpegImageFile
import numpy as np
from einops import rearrange
{% endraw %}

Preparation

{% raw %}
    
images = [Image.open(path) for path in glob.glob('./**/conv_test/*.jpg', recursive=True)]
plot_images(images)
{% endraw %} {% raw %}
c, h, w = 3, 300, 400
image_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize(w),
    transforms.CenterCrop((h, w))
])

XC = t.stack([image_transform(im) for im in images]) # C for Colour
plot_images(XC)
{% endraw %} {% raw %}
c, h, w = 1, 300, 400
image_transform = transforms.Compose([
    transforms.Grayscale(),
    transforms.ToTensor(),
    transforms.Resize(w),
    transforms.CenterCrop((h, w))
])

XG = t.stack([image_transform(im) for im in images]) # G for Gray
plot_images(XG)
{% endraw %}

Famous Filters

Sobel Filter

{% raw %}
Gx_w = [
    [-1., 0., +1.],
    [-2., 0., +2.],
    [-1., 0., +1.]]
Gy_w = [
    [+1., +2., +1.],
    [0.0, 0.0, 0.0],
    [-1., -2., -1.]]

out_x = conv2d(XG, Gx_w)
out_y = conv2d(XG, Gy_w)

output = t.sqrt(t.pow(out_x,2)+t.pow(out_y,2))
plot_images(output)
{% endraw %} {% raw %}
out_x = conv2d(XC, Gx_w, padding=1)
out_y = conv2d(XC, Gy_w, padding=1)

out = t.sqrt(t.pow(out_x,2)+t.pow(out_y,2))
plot_images(out/out.view(4, 3, -1).max(2)[0].reshape(4,3,1,1))
{% endraw %}

Box blur

{% raw %}
n = 15
W = [[1/n**2]*n]*n
out = conv2d(XG, W)

plot_images(out)
{% endraw %} {% raw %}
out = conv2d(XC, W)
plot_images(out)
{% endraw %}

Sharpen

{% raw %}
W = [
    [0.0, -1., 0.0],
    [-1., 5.0, -1.],
    [0.0, -1., 0.0],
]

out = conv2d(XG, W)
plot_images(out)
{% endraw %} {% raw %}
out = conv2d(XC, W)
plot_images(out)
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
Clipping input data to the valid range for imshow with RGB data ([0..1] for floats or [0..255] for integers).
{% endraw %}

Edge Detection

{% raw %}
W = [
    [-1., -1., -1.],
    [-1., 8.0, -1.],
    [-1., -1., -1.],
]

out = conv2d(XG, W)
plot_images(out)
{% endraw %} {% raw %}
out = conv2d(XG, W)
plot_images(out)
{% endraw %}

Separability Map

Integral Map

{% raw %}
def integral_image(X): # For batch
    for i in range(1, X.ndim):
        X = X.cumsum(i)
    return X

IG = integral_image(XG)
plot_images(IG)
{% endraw %}

Rectangular Seperability Filter

The rectangular seperability filter is made from 3 smaller rectangles.

{% raw %}
P2.shape
torch.Size([4, 1, 301, 401])
{% endraw %} {% raw %}
 TF.resize(out, XG.shape[-2:]).shape, out.shape
(torch.Size([4, 1, 300, 400]), torch.Size([4, 1, 300, 400]))
{% endraw %} {% raw %}
def plot_filter(W, title=''):
    plt.imshow(W)
    plt.title(title)
    plt.show()

out = t.zeros(XG.shape)
for i in range(1, np.prod(out.shape[-2:])):
    t1, t2, t3 = i, i, i

    R1 = np.asarray([[1/t3/t1/2]*t3]*t1*2)
    R2 = np.asarray([[1/t2/t3]*t3]*t2)
    N1 = R1.size
    N2 = R2.size
    N = N1 + N2

    # plot_filter(R1)
    # plot_filter(R2)
    # plot_filter()


    P1_w = np.vstack((R2*0, R1, R2*0))
    P1 = conv2d(XG, P1_w, padding=(t1+t2, t3//2))
    # plot_images(P1)

    P2_w = np.vstack((R2, R1*0, R2))
    P2 = conv2d(XG, P2_w, padding=(t1+t2, t3//2))
    # plot_images(P2)

    Pbar_w=  np.vstack((R2, R1, R2))
    Pbar = conv2d(XG, Pbar_w, padding=(t1+t2, t3//2))
    Pbar2 = conv2d(t.pow(XG, 2), Pbar_w, padding=(t1+t2, t3//2))
    # plot_images(Pbar)

    Sb = (N1/N) * t.pow((P1 - Pbar), 2) + (N2/N) * t.pow(P2 - Pbar, 2)
    # plot_images(Sb)

    St = Pbar2 - t.pow(Pbar, 2)
    # plot_images(St)

    out = t.maximum(out, TF.resize(Sb / St, XG.shape[-2:]))
    
plot_images(out)
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-135-317876021a49> in <module>
     29     Pbar_w=  np.vstack((R2, R1, R2))
     30     Pbar = conv2d(XG, Pbar_w, padding=(t1+t2, t3//2))
---> 31     Pbar2 = conv2d(t.pow(XG, 2), Pbar_w, padding=(t1+t2, t3//2))
     32     # plot_images(Pbar)
     33 

<ipython-input-52-7779bc3285fd> in conv2d(X, W, normalize_weights, **kwargs)
      5     h, w = W.shape[-2:]
      6     W = W.view(1, 1, h, w).repeat(c,1,1,1)
----> 7     return F.conv2d(X, W, groups=c, **kwargs)
      8 
      9 def plot_images(images):

KeyboardInterrupt: 
{% endraw %} {% raw %}
plot_images(out)
474
{% endraw %} {% raw %}
Sb = (N1/N) * t.pow((P1 - Pbar), 2) + (N2/N) * t.pow(P2 - Pbar, 2)
plot_images(Sb)

St = Pbar2 - t.pow(Pbar, 2)
plot_images(St)

out = Sb / St
plot_images(out)
{% endraw %} {% raw %}
out
tensor([[[[   nan,    nan,    nan,  ...,    nan,    nan,    nan],
          [   nan, 0.0067, 0.0136,  ..., 0.0136, 0.0067,    nan],
          [   nan, 0.0136, 0.0278,  ..., 0.0278, 0.0136,    nan],
          ...,
          [   nan, 0.0129, 0.0129,  ..., 0.0274, 0.0135,    nan],
          [   nan, 0.0067, 0.0067,  ..., 0.0136, 0.0067,    nan],
          [   nan,    nan,    nan,  ...,    nan,    nan,    nan]]],


        [[[   nan,    nan,    nan,  ...,    nan,    nan,    nan],
          [   nan, 0.0067, 0.0136,  ..., 0.0136, 0.0067,    nan],
          [   nan, 0.0136, 0.0278,  ..., 0.0278, 0.0136,    nan],
          ...,
          [   nan, 0.0136, 0.0278,  ..., 0.0275, 0.0136,    nan],
          [   nan, 0.0067, 0.0136,  ..., 0.0136, 0.0067,    nan],
          [   nan,    nan,    nan,  ...,    nan,    nan,    nan]]],


        [[[   nan,    nan,    nan,  ...,    nan,    nan,    nan],
          [   nan, 0.0067, 0.0136,  ..., 0.0136, 0.0067,    nan],
          [   nan, 0.0136, 0.0278,  ..., 0.0278, 0.0136,    nan],
          ...,
          [   nan, 0.0136, 0.0278,  ..., 0.0278, 0.0136,    nan],
          [   nan, 0.0067, 0.0136,  ..., 0.0136, 0.0067,    nan],
          [   nan,    nan,    nan,  ...,    nan,    nan,    nan]]],


        [[[   nan,    nan,    nan,  ...,    nan,    nan,    nan],
          [   nan, 0.0067, 0.0136,  ..., 0.0136, 0.0067,    nan],
          [   nan, 0.0136, 0.0278,  ..., 0.0278, 0.0136,    nan],
          ...,
          [   nan, 0.0136, 0.0278,  ..., 0.0278, 0.0136,    nan],
          [   nan, 0.0067, 0.0136,  ..., 0.0136, 0.0067,    nan],
          [   nan,    nan,    nan,  ...,    nan,    nan,    nan]]]])
{% endraw %} {% raw %}
def integral_image(X): # For batch
    for i in range(1, X.ndim):
        X = X.cumsum(i)
    return X

IC = integral_image(XC) 
IC /= IC.numpy().max()
plot_images(IC)
{% endraw %}